
/*=======================================================================
Creator: Jingyuan Wang, jingyuanwang@u.northwestern.edu
Date created: 				03/18/2019
Date last modified: 		
Purpose: 
		Use Misato energy price for the 48 countries to generate a region-level
		foreign prices
==========================================================================*/


foreach var in "exports" "imports" {
*************************************************************************
* 				PART I. Import the import data							*
*************************************************************************

use "$Schott/us_`var'_naics-cntry-89-17.dta", clear
drop if year > 2015

replace `var' = 0 if `var' == .

*************************************************************************
* 				PART II. merge with misato								*
*************************************************************************
* 1. merge-in sector code (consistent with misato)
gen naics_5digit = substr(naics_str, 1,5)
destring naics_5digit, replace

* change some 2012 code to 2007 version
replace naics_5digit = 31133 if naics_5digit == 31135
replace naics_5digit = 31221 if naics_5digit == 31223
replace naics_5digit = 31523 if naics_5digit == 31524
replace naics_5digit = 31529 if naics_5digit == 31528
replace naics_5digit = 33329 if naics_5digit == 33324

merge m:1 naics_5digit using  "$crosswalks/crosswalk_censusnaics_iea_allindustries.dta"
drop if _merge == 2
drop _merge
order naics_5digit naics_3digit sector, a(naics_str)

replace sector = "Textile and leather" if sector == "Textile and leather"
replace sector = "Manufacturing"  if sector == "Non-specified (Industry)"

* 2. merge with misato
rename isocode_uniform isoalpha3code
keep if year >= 1995 & year <= 2015
merge m:1 isoalpha3code year sector using "$misato/VEPLdata_final_update2017.dta"
drop if _merge == 2
* _merge == 2: 
* (1) sector  "Construction" "Industry" "Manufacturing" "Mining and quarrying" are not included in NAICS311-339
* (2) US prices and some tiny countries which do not trade with US
order country, a(isoalpha3code)


gen flag_price = 0
*replace flag_price = 1 if _merge == 3
replace flag_price = 1 if FEPI_fw2010 != .
drop _merge
	
*************************************************************************
* 				PART III. generate region-level prices					*
*************************************************************************

* 1. separate all the countries into 7 regions
replace region = "Europe" if isoalpha3code == "AUT"
tab region_wb region if region_wb == "Europe & Central Asia"
replace region_wb = "Central Asia" if region_wb == "Europe & Central Asia" & region == "Asia"
replace region_wb = "Europe" if region_wb == "Europe & Central Asia" & region == "Europe"

rename isoalpha3code iso
drop region
gen region = region_wb
replace region = "Central America" if region_wb == "North America"
replace region = "CAN" if iso == "CAN"
replace region = "Asia & Pacific" if region_wb == "East Asia & Pacific" | region_wb == "Central Asia" | region_wb == "South Asia"
replace region = "South America" if region_wb == "Latin America & Caribbean"
replace region = "Africa" if region_wb == "Sub-Saharan Africa"

egen region_code = group(region)

* 2. keep only countries with energy prices available every year

* 2.1 get price available countries
preserve
	use "$misato/VEPLdata_final_update2017.dta", replace
	keep FEPI_fw2010 isoalpha3code year sector
	drop if FEPI_fw2010 == .
	bysort isoalpha3code sector: gen N = _N
	drop if N != 21 & isoalpha3code != "ZAF"
	keep isoalpha3code sector
	drop if sector ==  "Construction" | sector == "Industry" | sector == "Mining and quarrying"
	*replace sector = "Non-specified (Industry)" if sector == "Manufacturing"
	duplicates drop
	rename isoalpha3code iso
	drop if iso == "USA"
	gen flag_price = 1
	tempfile availablecountry
	save `availablecountry.dta', replace
restore

drop flag_price
merge m:1 iso sector using `availablecountry.dta'
drop _merge

keep if flag_price == 1

* 2.2 weighted average the prices
keep region naics_str naics_5digit naics_3digit sector country year iso `var' FEPI_fw2010
replace FEPI_fw2010 = exp(FEPI_fw2010)

collapse (sum) `var' (first) FEPI_fw2010, by(sector country region year iso) 
label var `var' "`var' [$ b]"
label var FEPI_fw2010 "2010 weight Energy Price Index 2010 USD per TOE, real"
egen sector_code = group(sector)

* generate country level 21-year averqage share
bysort region sector: egen total_`var' = total(`var')
bysort country region sector: egen country_`var' = total(`var')

gen countryshare = country_`var'/total_`var'
keep sector year iso country region FEPI_fw2010 countryshare sector_code

gen price_index = FEPI_fw2010 * countryshare
collapse (sum) price_index countryshare, by(sector sector_code year region)
drop countryshare
* checked that collapsed countryshare ~= 1
label var price_index "2010 weight Energy Price Index 2010 USD per TOE, real"


* South Africa 2015 prices are missing. I'll fill it with 2014 South Africa prices.
* According to the time-series plot, I believe this is ok.
sort region sector year
egen group = group(region sector)
sort group year
bysort group : gen n = _n
gen price2014 = price_index if n == 20
bysort group: egen price_2014 = total(price2014)
replace price_index = price_2014 if n == 21 & region == "Africa" & price_index == 0
drop price2014 price_2014 n group

* save
drop sector_code
save "$energy_price/`var'priceindex.dta", replace

}
